Data Preprocess

df <- df %>% mutate(
  food_waste_p_kg = food_waste_kg/customers,
  solid_waste_p_kg = solid_waste_kg/customers,
  liquid_waste_p_kg = liquid_waste_kg/customers
) %>% 
  replace_na(list(food_waste_p_kg = 0,
                  solid_waste_p_kg = 0,
                  liquid_waste_p_kg = 0))

Additive Multiple Linear Model

Food loss and food waste

library(modeldata)
library(purrr)
library(tidyr)
## Multi-output linear regression -----
#### Target outcomes:
# 1. food_loss_kg
# 2. food_waste_kg
# 3. solid_waste_kg
# 4. liquid_waste_kg
## predictors: temp_c, humi_p, prcp_mm, 
#              tueE, wedE, thuE, friE, satE,
#              container, liquors, sales, halfs

aml_results <- df %>% 
  filter(!is_closed) %>% 
  mutate(var. = cbind(temp_c, humi_p, prcp_mm,
                      tueE, wedE, thuE, friE, satE, 
                      container, liquors, sales, halfs)) %>% 
  mutate(outputs = cbind(food_loss_kg, food_waste_kg,
                         solid_waste_kg, liquid_waste_kg)) %>% 
  lm(outputs ~ var., data =.)
summary(aml_results)
## Response food_loss_kg :
## 
## Call:
## lm(formula = food_loss_kg ~ var., data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.7441 -1.0480 -0.1397  0.8429  5.0884 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    6.9092508  1.4216037   4.860 2.96e-06 ***
## var.temp_c     0.0541936  0.0176450   3.071  0.00254 ** 
## var.humi_p     0.0288754  0.0149505   1.931  0.05535 .  
## var.prcp_mm   -0.0879013  0.0804906  -1.092  0.27658    
## var.tueE       0.8251631  0.3772490   2.187  0.03029 *  
## var.wedE      -0.4887517  0.3754927  -1.302  0.19507    
## var.thuE      -0.7736748  0.3629117  -2.132  0.03467 *  
## var.friE      -0.0028376  0.3558794  -0.008  0.99365    
## var.satE       0.3850089  0.3746317   1.028  0.30577    
## var.container -0.9667278  0.3292345  -2.936  0.00385 ** 
## var.liquors    0.0368363  0.1063059   0.347  0.72945    
## var.sales     -0.0009308  0.0011528  -0.807  0.42073    
## var.halfs     -0.0077775  0.0575415  -0.135  0.89267    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.002 on 148 degrees of freedom
## Multiple R-squared:  0.2104, Adjusted R-squared:  0.1464 
## F-statistic: 3.286 on 12 and 148 DF,  p-value: 0.0003111
## 
## 
## Response food_waste_kg :
## 
## Call:
## lm(formula = food_waste_kg ~ var., data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.9444 -0.6870 -0.1213  0.5307  3.2825 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -1.4487861  0.7001179  -2.069   0.0403 *  
## var.temp_c     0.0028126  0.0086899   0.324   0.7466    
## var.humi_p     0.0053388  0.0073629   0.725   0.4695    
## var.prcp_mm   -0.0417248  0.0396404  -1.053   0.2942    
## var.tueE       0.1706132  0.1857893   0.918   0.3599    
## var.wedE      -0.1088752  0.1849244  -0.589   0.5569    
## var.thuE      -0.3865671  0.1787284  -2.163   0.0322 *  
## var.friE       0.1925727  0.1752651   1.099   0.2737    
## var.satE      -0.2594445  0.1845003  -1.406   0.1618    
## var.container  0.2795788  0.1621429   1.724   0.0867 .  
## var.liquors    0.0060825  0.0523540   0.116   0.9077    
## var.sales      0.0039416  0.0005678   6.942 1.13e-10 ***
## var.halfs      0.0691157  0.0283383   2.439   0.0159 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9858 on 148 degrees of freedom
## Multiple R-squared:  0.543,  Adjusted R-squared:  0.506 
## F-statistic: 14.66 on 12 and 148 DF,  p-value: < 2.2e-16
## 
## 
## Response solid_waste_kg :
## 
## Call:
## lm(formula = solid_waste_kg ~ var., data = .)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.77317 -0.26285 -0.08542  0.17375  2.23890 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -0.3393464  0.2891174  -1.174   0.2424    
## var.temp_c    -0.0018981  0.0035885  -0.529   0.5976    
## var.humi_p     0.0010506  0.0030406   0.346   0.7302    
## var.prcp_mm   -0.0173666  0.0163697  -1.061   0.2905    
## var.tueE       0.1472586  0.0767227   1.919   0.0569 .  
## var.wedE      -0.0170184  0.0763655  -0.223   0.8240    
## var.thuE      -0.1422127  0.0738068  -1.927   0.0559 .  
## var.friE       0.0577250  0.0723767   0.798   0.4264    
## var.satE      -0.0972288  0.0761904  -1.276   0.2039    
## var.container -0.0292830  0.0669578  -0.437   0.6625    
## var.liquors    0.0078168  0.0216199   0.362   0.7182    
## var.sales      0.0012807  0.0002345   5.462 1.94e-07 ***
## var.halfs      0.0138583  0.0117024   1.184   0.2382    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4071 on 148 degrees of freedom
## Multiple R-squared:  0.3845, Adjusted R-squared:  0.3346 
## F-statistic: 7.704 on 12 and 148 DF,  p-value: 5.36e-11
## 
## 
## Response liquid_waste_kg :
## 
## Call:
## lm(formula = liquid_waste_kg ~ var., data = .)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.42750 -0.48344 -0.07879  0.44523  1.80789 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -1.1094396  0.4911865  -2.259  0.02536 *  
## var.temp_c     0.0047107  0.0060966   0.773  0.44095    
## var.humi_p     0.0042882  0.0051656   0.830  0.40780    
## var.prcp_mm   -0.0243582  0.0278108  -0.876  0.38253    
## var.tueE       0.0233546  0.1303455   0.179  0.85805    
## var.wedE      -0.0918567  0.1297387  -0.708  0.48005    
## var.thuE      -0.2443544  0.1253917  -1.949  0.05322 .  
## var.friE       0.1348477  0.1229620   1.097  0.27457    
## var.satE      -0.1622157  0.1294412  -1.253  0.21211    
## var.container  0.3088618  0.1137557   2.715  0.00741 ** 
## var.liquors   -0.0017344  0.0367304  -0.047  0.96240    
## var.sales      0.0026609  0.0003983   6.680 4.52e-10 ***
## var.halfs      0.0552573  0.0198815   2.779  0.00615 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6916 on 148 degrees of freedom
## Multiple R-squared:  0.5535, Adjusted R-squared:  0.5173 
## F-statistic: 15.29 on 12 and 148 DF,  p-value: < 2.2e-16

Coefficients Visualization

library(broom)
library(ggplot2)
model_outputs <- tidy(aml_results)
conf_ints <- aml_results %>% 
  tidy(., conf.int = TRUE) %>% 
  filter(!term %in% "(Intercept)")
conf_ints$term <- factor(conf_ints$term, 
                         levels = c("var.temp_c", "var.humi_p", "var.prcp_mm", 
                                    "var.tueE", "var.wedE", 
                                    "var.thuE", "var.friE", "var.satE",
                                    "var.container", "var.liquors",
                                    "var.sales", "var.halfs"))

# Plot coeffs
conf_ints %>% 
  ggplot(aes(x = term,
             y = estimate,
             fill = response),
         position = position_dodge(width = 0.8)) +
  geom_errorbar(aes(ymin=conf.low, ymax=conf.high, color = response), 
                width = 0.2, linewidth  = 0.5,
                position = position_dodge(width = 0.8)) +
  geom_hline(yintercept = 0, color = "red", linewidth = 0.5) +
  geom_point(aes(color = response), position = position_dodge(width = 0.8)) +
  coord_flip() +
  theme(panel.grid.minor.y = element_line(color = 2,
                                          linewidth = 0.25,
                                          linetype = 1))

Per Customer

## Multi-output linear regression -----
## Target outcomes:
# 1. food_waste_p_kg
# 2. solid_waste_p_kg
# 3. liquid_waste_p_kg
## predictors: temp_c, humi_p, prcp_mm, 
#              tueE, wedE, thuE, friE, satE,
#              container, liquors, sales, halfs
aml_result_p <- df %>% 
  filter(!is_closed) %>% 
  mutate(var. = cbind(temp_c, humi_p, prcp_mm, 
                           tueE, wedE, thuE, friE, satE, 
                           container, liquors, sales, halfs)) %>% 
  mutate(outputs = cbind(food_waste_p_kg,
                         solid_waste_p_kg, liquid_waste_p_kg)) %>% 
  lm(outputs ~ var., data =.)
summary(aml_result_p)
## Response food_waste_p_kg :
## 
## Call:
## lm(formula = food_waste_p_kg ~ var., data = .)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.070808 -0.025440 -0.002235  0.020112  0.146466 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)  
## (Intercept)    3.970e-02  2.660e-02   1.492    0.138  
## var.temp_c     1.395e-04  3.301e-04   0.423    0.673  
## var.humi_p     5.093e-05  2.797e-04   0.182    0.856  
## var.prcp_mm   -1.914e-03  1.506e-03  -1.271    0.206  
## var.tueE       5.465e-03  7.058e-03   0.774    0.440  
## var.wedE      -6.143e-03  7.025e-03  -0.874    0.383  
## var.thuE      -7.898e-03  6.790e-03  -1.163    0.247  
## var.friE       8.833e-03  6.658e-03   1.327    0.187  
## var.satE      -1.050e-02  7.009e-03  -1.499    0.136  
## var.container  1.533e-02  6.160e-03   2.488    0.014 *
## var.liquors    1.945e-03  1.989e-03   0.978    0.330  
## var.sales      3.475e-05  2.157e-05   1.611    0.109  
## var.halfs      6.672e-04  1.077e-03   0.620    0.536  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03745 on 148 degrees of freedom
## Multiple R-squared:  0.1602, Adjusted R-squared:  0.09214 
## F-statistic: 2.353 on 12 and 148 DF,  p-value: 0.008537
## 
## 
## Response solid_waste_p_kg :
## 
## Call:
## lm(formula = solid_waste_p_kg ~ var., data = .)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.027111 -0.009757 -0.001349  0.007366  0.101048 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)  
## (Intercept)    2.035e-02  1.107e-02   1.838   0.0680 .
## var.temp_c    -4.721e-05  1.374e-04  -0.344   0.7316  
## var.humi_p    -3.825e-05  1.164e-04  -0.329   0.7430  
## var.prcp_mm   -8.164e-04  6.267e-04  -1.303   0.1947  
## var.tueE       5.960e-03  2.937e-03   2.029   0.0442 *
## var.wedE      -2.923e-04  2.924e-03  -0.100   0.9205  
## var.thuE      -3.814e-03  2.826e-03  -1.350   0.1792  
## var.friE       1.786e-03  2.771e-03   0.645   0.5202  
## var.satE      -3.992e-03  2.917e-03  -1.368   0.1732  
## var.container -9.782e-05  2.563e-03  -0.038   0.9696  
## var.liquors    6.219e-04  8.277e-04   0.751   0.4537  
## var.sales      1.336e-05  8.976e-06   1.489   0.1387  
## var.halfs     -2.354e-04  4.480e-04  -0.525   0.6001  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01559 on 148 degrees of freedom
## Multiple R-squared:  0.09039,    Adjusted R-squared:  0.01664 
## F-statistic: 1.226 on 12 and 148 DF,  p-value: 0.2707
## 
## 
## Response liquid_waste_p_kg :
## 
## Call:
## lm(formula = liquid_waste_p_kg ~ var., data = .)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.04658 -0.01829 -0.00111  0.01530  0.07904 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    1.935e-02  1.875e-02   1.032 0.303893    
## var.temp_c     1.867e-04  2.328e-04   0.802 0.423722    
## var.humi_p     8.918e-05  1.972e-04   0.452 0.651806    
## var.prcp_mm   -1.098e-03  1.062e-03  -1.034 0.302856    
## var.tueE      -4.951e-04  4.977e-03  -0.099 0.920881    
## var.wedE      -5.851e-03  4.953e-03  -1.181 0.239422    
## var.thuE      -4.084e-03  4.787e-03  -0.853 0.394946    
## var.friE       7.047e-03  4.695e-03   1.501 0.135461    
## var.satE      -6.512e-03  4.942e-03  -1.318 0.189633    
## var.container  1.542e-02  4.343e-03   3.551 0.000515 ***
## var.liquors    1.323e-03  1.402e-03   0.943 0.347015    
## var.sales      2.139e-05  1.521e-05   1.406 0.161732    
## var.halfs      9.026e-04  7.591e-04   1.189 0.236315    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02641 on 148 degrees of freedom
## Multiple R-squared:  0.209,  Adjusted R-squared:  0.1449 
## F-statistic:  3.26 on 12 and 148 DF,  p-value: 0.0003427

Coefficients Visualization

library(broom)
library(ggplot2)
model_outputs <- tidy(aml_result_p)
conf_ints_p <- aml_result_p %>% 
  tidy(., conf.int = TRUE) %>% 
  filter(!term %in% "(Intercept)")

# Plot coeffs
conf_ints_p %>% 
  ggplot(aes(x = term,
             y = estimate,
             fill = response),
         position = position_dodge(width = 0.8)) +
  geom_errorbar(aes(ymin=conf.low, ymax=conf.high, color = response), 
                width = 0.2, linewidth  = 0.5,
                position = position_dodge(width = 0.8)) +
  geom_hline(yintercept = 0, color = "red", linewidth = 0.5) +
  geom_point(aes(color = response), position = position_dodge(width = 0.8)) +
  coord_flip()

RDiT

Scatter plot

library(moderndive)
library(ggplot2)
### Color Legend ----
# blue line is no interaction -> parallel effect
# red line is with interaction -> not parallel
# black dot line is overall effect

# Daily Plot on food waste -----------------------------------------------
daily_waste <- df %>% 
  filter(is_closed %in% FALSE) %>% 
  ggplot(., aes(x = as.Date(date), y = food_waste_kg)) +
  geom_point() +
  stat_smooth(aes(color = 'Overall'), method = "lm", formula = y ~ x,
              linetype = "dashed", se = FALSE) +
  geom_parallel_slopes(aes(group = container, color = 'No interaction'), 
                       se = FALSE) +
  stat_smooth(aes(group = container, color = 'interaction'),
              method = "lm", formula = y ~ x, se = FALSE) +
  scale_x_date(date_labels = "%b %d") +
  scale_color_manual(name="Model Type",
                     breaks = c('Overall','No interaction','interaction'),
                     values = c('Overall' ='black',
                                'No interaction'='blue',
                                'interaction'='red')) +
  theme(legend.position = "right") +
  xlab("Date") + ylab("Daily Food Waste (kg)") +
  ggtitle("Container Charge Effect Food Waste")
daily_waste

# Daily Plot on solid food waste -----------------------------------------
daily_solid_waste <- 
  df %>% filter(is_closed %in% FALSE) %>% 
  ggplot(data = ., aes(x = as.Date(date), y = solid_waste_kg)) +
  geom_point() +
  stat_smooth(aes(color = 'Overall'), method = "lm", formula = y ~ x,
              linetype = "dashed", se = FALSE) +
  geom_parallel_slopes(aes(group = container, color = 'No interaction'), 
                       se = FALSE) +
  stat_smooth(aes(group = container, color = 'interaction'), 
              method = "lm", formula = y ~ x, se = FALSE) +
  scale_x_date(date_labels = "%b %d") +
  scale_color_manual(name="Model Type",
                     breaks = c('Overall','No interaction','interaction'),
                     values = c('Overall' ='black',
                                'No interaction'='blue',
                                'interaction'='red')) +
  theme(legend.position = "right") +
  xlab("Date") + ylab("Daily Solid Food Waste (kg)") +
  ggtitle("Container Charge Effect on Solid Food Waste")
daily_solid_waste

# Daily Plot on liquid food waste ----------------------------------------
daily_liquid_waste <- 
  df %>% filter(is_closed %in% FALSE) %>% 
  ggplot(data = ., aes(x = as.Date(date), y = liquid_waste_kg)) +
  geom_point() +
  stat_smooth(aes(color = 'Overall'), method = "lm", 
              formula = y ~ x,  linetype = "dashed", se = FALSE) +
  geom_parallel_slopes(aes(group = container, color = 'No interaction'), 
                       se = FALSE) +
  stat_smooth(aes(group = container, color = 'interaction'),
              method = "lm", formula = y ~ x, se = FALSE) +
  scale_x_date(date_labels = "%b %d") +
  scale_color_manual(name="Model Type",
                     breaks = c('Overall','No interaction','interaction'),
                     values = c('Overall' ='black',
                                'No interaction'='blue',
                                'interaction'='red')) +
  theme(legend.position = "right") +
  xlab("Date") + ylab("Daily Liquid ood Waste (kg)") +
  ggtitle("Container Charge Effect on Liquid Food Waste")
daily_liquid_waste

# grid.arrange(daily_waste,
#              daily_solid_waste,daily_liquid_waste)

Scatter plot per Customer

library(moderndive)
# Daily Plot on food waste -----------------------------------------------
daily_waste_p <- 
  df %>% filter(is_closed %in% FALSE) %>% 
  ggplot(data = ., aes(x = as.Date(date), y = food_waste_p_kg)) +
  geom_point() +
  stat_smooth(aes(color = 'Overall'), method = "lm", formula = y ~ x,
              linetype = "dashed", se = FALSE) +
  geom_parallel_slopes(aes(group = container, color = 'No interaction'),
                       se = FALSE) +
  stat_smooth(aes(group = container, color = 'interaction'), 
              method = "lm", formula = y ~ x, se = FALSE) +
  scale_color_manual(name="Model Type",
                     breaks = c('Overall','No interaction','interaction'),
                     values = c('Overall' ='black',
                                'No interaction'='blue',
                                'interaction'='red')) +
  theme(legend.position = "right") +
  scale_x_date(date_labels = "%b %d") +
  xlab("Date") + ylab("Food Waste per Customer (kg)") +
  ggtitle("Container Charge Effect on Food Waste per Customer")
daily_waste_p

# Daily Plot on solid food waste -----------------------------------------
daily_solid_waste_p <- 
  df %>% filter(is_closed %in% FALSE) %>% 
  ggplot(data = ., aes(x = as.Date(date), y = solid_waste_p_kg)) +
  geom_point() +
  stat_smooth(aes(color = 'Overall'), method = "lm", formula = y ~ x,
              linetype = "dashed", se = FALSE) +
  geom_parallel_slopes(aes(group = container, color = 'No interaction'), 
                       se = FALSE) +
  stat_smooth(aes(group = container, color = 'interaction'), method = "lm",
              formula = y ~ x, se = FALSE) +
  scale_color_manual(name="Model Type",
                     breaks = c('Overall','No interaction','interaction'),
                     values = c('Overall' ='black',
                                'No interaction'='blue',
                                'interaction'='red')) +
  theme(legend.position = "right") +
  scale_x_date(date_labels = "%b %d") +
  xlab("Date") + ylab("Solid Food Waste per Customer (kg)") +
  ggtitle("Container Charge Effect on Solid Food Waste per Customer")
daily_solid_waste_p

# Daily Plot on liquid food waste ----------------------------------------
daily_liquid_waste_p <- 
  df %>% filter(is_closed %in% FALSE) %>% 
  ggplot(data = ., aes(x = as.Date(date), y = liquid_waste_p_kg)) +
  geom_point() +
  stat_smooth(aes(color = 'Overall'), method = "lm", formula = y ~ x,
              linetype = "dashed", se = FALSE) +
  geom_parallel_slopes(aes(group = container, color = 'No interaction'), 
                       se = FALSE) +
  stat_smooth(aes(group = container, color = 'interaction'), 
              method = "lm", formula = y ~ x, se = FALSE) +
  scale_color_manual(name="Model Type",
                     breaks = c('Overall','No interaction','interaction'),
                     values = c('Overall' ='black',
                                'No interaction'='blue',
                                'interaction'='red')) +
  theme(legend.position = "right") +
  scale_x_date(date_labels = "%b %d") +
  xlab("Date") + ylab("Liquid Food Waste per Customer (kg)") +
  ggtitle("Container Charge Effect on Liquid Food Waste per Customer")
daily_liquid_waste_p

# grid.arrange(daily_loss_waste,daily_loss, daily_waste,
#              daily_solid_waste,daily_liquid_waste)

RDinT Analysis

library(dplyr)
df <- df %>%
  filter(is_closed %in% FALSE) %>% 
  mutate(time = seq(1:sum(!df$is_closed)))

cutoff <- df %>% filter(date %in% as.Date('2023-01-03')) %>% dplyr::select(time) %>% as.numeric()

df <- df %>%  mutate(time = time - cutoff)
Interaction
# simple food waste -----
# Formula:
rdt_int_fw <- food_waste_kg ~ container * time 
rdt_int_fw <- df %>% 
            filter(!is_closed) %>% 
            lm(rdt_int_fw, data = .)
summary(rdt_int_fw)
## 
## Call:
## lm(formula = rdt_int_fw, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.3728 -1.0750 -0.1146  0.7660  3.9877 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     1.640999   0.303450   5.408 2.34e-07 ***
## container       0.451440   0.440216   1.025   0.3067    
## time           -0.013091   0.006059  -2.161   0.0322 *  
## container:time  0.014899   0.009594   1.553   0.1225    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.395 on 157 degrees of freedom
## Multiple R-squared:  0.02956,    Adjusted R-squared:  0.01101 
## F-statistic: 1.594 on 3 and 157 DF,  p-value: 0.1931
#####
# summary(rdt_fw <- lm(formula =  rdt_fw, 
#                      data = df, subset = (!df$is_closed)))

# simple solid food waste -----
rdt_int_sfw <- solid_waste_kg ~ container * time 
rdt_int_sfw <- df %>% 
            filter(!is_closed) %>% 
            lm(rdt_int_sfw, data = .)
summary(rdt_int_sfw)
## 
## Call:
## lm(formula = rdt_int_sfw, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7752 -0.3082 -0.1020  0.2648  2.1840 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.627004   0.107778   5.818 3.25e-08 ***
## container       0.002645   0.156354   0.017    0.987    
## time           -0.003309   0.002152  -1.538    0.126    
## container:time  0.003462   0.003408   1.016    0.311    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4954 on 157 degrees of freedom
## Multiple R-squared:  0.03307,    Adjusted R-squared:  0.01459 
## F-statistic:  1.79 on 3 and 157 DF,  p-value: 0.1514
# simple liquid food waste -----
rdt_int_lfw <- liquid_waste_kg ~ container * time 
rdt_int_lfw <- df %>% 
            filter(!is_closed) %>% 
            lm(rdt_int_lfw, data = .)
summary(rdt_int_lfw)
## 
## Call:
## lm(formula = rdt_int_lfw, data = .)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.59768 -0.82269 -0.00855  0.56427  2.82840 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     1.013995   0.214848   4.720 5.19e-06 ***
## container       0.448795   0.311680   1.440   0.1519    
## time           -0.009783   0.004290  -2.280   0.0239 *  
## container:time  0.011437   0.006793   1.684   0.0942 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9875 on 157 degrees of freedom
## Multiple R-squared:  0.03439,    Adjusted R-squared:  0.01594 
## F-statistic: 1.864 on 3 and 157 DF,  p-value: 0.1379

Ass-Interaction

  1. Linearity of the relationships between the dependent and independent variables
  2. Normality of the residuals
  3. Homoscedasticity of the residuals
  4. No influential points (outliers)
  5. No multicollinearity
  6. Independence of the observations
library(performance)
ass_int_fw  <- plot(check_model(rdt_int_fw, detrend=FALSE, panel = FALSE))
ass_int_sfw <- plot(check_model(rdt_int_sfw, detrend=FALSE,panel = FALSE))
ass_int_lfw <- plot(check_model(rdt_int_lfw, detrend=FALSE,panel = FALSE))

# 1. Linearity of the relationships between the dependent and independent variables
# 1.1 plot residual vs fitted values
ass_int_fw[[2]]  + labs(title = "Linearity: Food Waste", subtitle = "")

ass_int_sfw[[2]] + labs(title = "Linearity: Solid Food Waste", subtitle = "")

ass_int_lfw[[2]] + labs(title = "Linearity: Liquid Food Waste", subtitle = "")

# 1.2 check linearity
check_heteroscedasticity(rdt_int_fw)
## Warning: Heteroscedasticity (non-constant error variance) detected (p = 0.006).
check_heteroscedasticity(rdt_int_sfw)
## Warning: Heteroscedasticity (non-constant error variance) detected (p < .001).
check_heteroscedasticity(rdt_int_lfw)
## Warning: Heteroscedasticity (non-constant error variance) detected (p = 0.023).
# 2. Normality of the residuals
# 2.1 histogram of residuals
# Normality of Residuals: Food Waste
plot(check_normality(rdt_int_fw), type = "density") + 
  labs(title = "Normality of Residuals: Food Waste", subtitle = "")

# Normality of Residuals: Solid Food Waste
plot(check_normality(rdt_int_sfw), type = "density") + 
  labs(title = "Normality of Residuals: Solid Food Waste", subtitle = "") 

# Normality of Residuals: Liquid Food Waste
plot(check_normality(rdt_int_lfw), type = "density") + 
  labs(title = "Normality of Residuals: Liquid Food Waste", subtitle = "") 

# 2.2 Normality of Residuals
ass_int_fw[[6]]  + labs(title = "QQ Plot of Residuals: Food Waste", subtitle = "")

ass_int_sfw[[6]] + labs(title = "QQ Plot of Residuals: Solid Food Waste", subtitle = "")

ass_int_lfw[[6]] + labs(title = "QQ Plot of Residuals: Liquid Food Waste", subtitle = "")

# 2.3 shapiro-wilk normality test
check_normality(rdt_int_fw)
## Warning: Non-normality of residuals detected (p < .001).
check_normality(rdt_int_sfw)
## Warning: Non-normality of residuals detected (p < .001).
check_normality(rdt_int_lfw)
## Warning: Non-normality of residuals detected (p < .001).
# 3. Homoscedasticity of the residuals
# 3.1 plot residuals
ass_int_fw[[3]]  + labs(title = "Homoscedasticity: Food Waste", subtitle = "")

ass_int_sfw[[3]] + labs(title = "Homoscedasticity: Solid Food Waste", subtitle = "")

ass_int_lfw[[3]] + labs(title = "Homoscedasticity: Liquid Food Waste", subtitle = "")

# 3.2 Breusch-Pagan test
lmtest::bptest(rdt_int_fw)
## 
##  studentized Breusch-Pagan test
## 
## data:  rdt_int_fw
## BP = 7.8124, df = 3, p-value = 0.05005
lmtest::bptest(rdt_int_sfw)
## 
##  studentized Breusch-Pagan test
## 
## data:  rdt_int_sfw
## BP = 5.4746, df = 3, p-value = 0.1402
lmtest::bptest(rdt_int_lfw)
## 
##  studentized Breusch-Pagan test
## 
## data:  rdt_int_lfw
## BP = 7.3214, df = 3, p-value = 0.06233
# 4. No influential points (outliers)
ass_int_fw[[4]]  + labs(title = "Outliers: Food Waste", subtitle = "")

ass_int_sfw[[4]] + labs(title = "Outliers: Solid Food Waste", subtitle = "")

ass_int_lfw[[4]] + labs(title = "Outliers: Liquid Food Waste", subtitle = "")

# 5. No multicollinearity
ass_int_fw[[5]]  + labs(title = "VIF: Food Waste", subtitle = "")

ass_int_sfw[[5]] + labs(title = "VIF: Solid Food Waste", subtitle = "")

ass_int_lfw[[5]] + labs(title = "VIF: Liquid Food Waste", subtitle = "")

# 6. Independence of the observations
# Autocorrelation
check_autocorrelation(rdt_int_fw)
## OK: Residuals appear to be independent and not autocorrelated (p = 0.522).
check_autocorrelation(rdt_int_sfw)
## OK: Residuals appear to be independent and not autocorrelated (p = 0.550).
check_autocorrelation(rdt_int_lfw)
## OK: Residuals appear to be independent and not autocorrelated (p = 0.494).
Multiple model
# multi food waste -----
rdt_multi_fw <- food_waste_kg ~ container * time +
                            temp_c + humi_p + prcp_mm + 
                            liquors + sales + halfs
rdt_multi_fw <- df %>% 
            filter(!is_closed) %>% 
            lm(rdt_multi_fw, data = .)
summary(rdt_multi_fw)
## 
## Call:
## lm(formula = rdt_multi_fw, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.0485 -0.6500 -0.0912  0.4476  3.3391 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -2.2312704  0.9452769  -2.360   0.0195 *  
## container       0.5782882  0.3674976   1.574   0.1177    
## time           -0.0083069  0.0065926  -1.260   0.2096    
## temp_c         -0.0105319  0.0126725  -0.831   0.4072    
## humi_p          0.0091364  0.0093094   0.981   0.3280    
## prcp_mm        -0.0393178  0.0405061  -0.971   0.3333    
## liquors        -0.0103638  0.0509853  -0.203   0.8392    
## sales           0.0040949  0.0005081   8.059 2.17e-13 ***
## halfs           0.0715002  0.0294293   2.430   0.0163 *  
## container:time  0.0093167  0.0100585   0.926   0.3558    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.008 on 151 degrees of freedom
## Multiple R-squared:  0.5122, Adjusted R-squared:  0.4832 
## F-statistic: 17.62 on 9 and 151 DF,  p-value: < 2.2e-16
# multi solid food waste -----
rdt_multi_sfw <- solid_waste_kg ~ container * time +
                            temp_c + humi_p + prcp_mm + 
                            liquors + sales + halfs

rdt_multi_sfw <- df %>% 
            filter(!is_closed) %>% 
            lm(rdt_multi_sfw, data = .)
summary(rdt_multi_sfw)
## 
## Call:
## lm(formula = rdt_multi_sfw, data = .)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.67954 -0.25736 -0.07937  0.18700  2.32548 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -0.4376693  0.3876971  -1.129    0.261    
## container       0.1019002  0.1507259   0.676    0.500    
## time           -0.0025510  0.0027039  -0.943    0.347    
## temp_c         -0.0056240  0.0051975  -1.082    0.281    
## humi_p          0.0014782  0.0038182   0.387    0.699    
## prcp_mm        -0.0154605  0.0166132  -0.931    0.354    
## liquors         0.0080184  0.0209112   0.383    0.702    
## sales           0.0012142  0.0002084   5.826 3.31e-08 ***
## halfs           0.0130254  0.0120702   1.079    0.282    
## container:time  0.0016192  0.0041254   0.392    0.695    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4135 on 151 degrees of freedom
## Multiple R-squared:  0.3519, Adjusted R-squared:  0.3133 
## F-statistic: 9.112 on 9 and 151 DF,  p-value: 5.999e-11
# multi liquid food waste -----
rdt_multi_lfw <- liquid_waste_kg ~ container * time +
                            temp_c + humi_p + prcp_mm + 
                            liquors + sales + halfs
rdt_multi_lfw <- df %>% 
            filter(!is_closed) %>% 
            lm(rdt_multi_lfw, data = .)
summary(rdt_multi_lfw)
## 
## Call:
## lm(formula = rdt_multi_lfw, data = .)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.55792 -0.44014 -0.07213  0.38966  1.87255 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -1.7936011  0.6627069  -2.706  0.00758 ** 
## container       0.4763880  0.2576422   1.849  0.06641 .  
## time           -0.0057559  0.0046219  -1.245  0.21493    
## temp_c         -0.0049079  0.0088843  -0.552  0.58148    
## humi_p          0.0076581  0.0065265   1.173  0.24249    
## prcp_mm        -0.0238573  0.0283977  -0.840  0.40217    
## liquors        -0.0183822  0.0357444  -0.514  0.60782    
## sales           0.0028807  0.0003562   8.086 1.85e-13 ***
## halfs           0.0584748  0.0206321   2.834  0.00522 ** 
## container:time  0.0076975  0.0070517   1.092  0.27676    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7069 on 151 degrees of freedom
## Multiple R-squared:  0.5242, Adjusted R-squared:  0.4958 
## F-statistic: 18.48 on 9 and 151 DF,  p-value: < 2.2e-16

Ass-Multiple

  1. Linearity of the relationships between the dependent and independent variables
  2. Normality of the residuals
  3. Homoscedasticity of the residuals
  4. No influential points (outliers)
  5. No multicollinearity
  6. Independence of the observations
library(performance)
ass_multi_fw  <- plot(check_model(rdt_multi_fw, detrend=FALSE,panel = FALSE))
ass_multi_sfw <- plot(check_model(rdt_multi_sfw, detrend=FALSE,panel = FALSE))
ass_multi_lfw <- plot(check_model(rdt_multi_lfw, detrend=FALSE,panel = FALSE))

# 1. Linearity of the relationships between the dependent and independent variables
# 1.1 plot residual vs fitted values
ass_multi_fw[[2]]  + labs(title = "Linearity: Food Waste", subtitle = "")

ass_multi_sfw[[2]] + labs(title = "Linearity: Solid Food Waste", subtitle = "")

ass_multi_lfw[[2]] + labs(title = "Linearity: Liquid Food Waste", subtitle = "")

# 1.2 check linearity
check_heteroscedasticity(rdt_multi_fw)
## Warning: Heteroscedasticity (non-constant error variance) detected (p = 0.008).
check_heteroscedasticity(rdt_multi_sfw)
## Warning: Heteroscedasticity (non-constant error variance) detected (p = 0.006).
check_heteroscedasticity(rdt_multi_lfw)
## Warning: Heteroscedasticity (non-constant error variance) detected (p = 0.003).
# 2. Normality of the residuals
# 2.1 histogram of residuals
# Normality of Residuals: Food Waste
plot(check_normality(rdt_multi_fw), type = "density") + 
  labs(title = "Normality of Residuals: Food Waste", subtitle = "")

# Normality of Residuals: Solid Food Waste
plot(check_normality(rdt_multi_sfw), type = "density") + 
  labs(title = "Normality of Residuals: Solid Food Waste", subtitle = "")

# Normality of Residuals: Liquid Food Waste
plot(check_normality(rdt_multi_lfw), type = "density") + 
  labs(title = "Normality of Residuals: Liquid Food Waste", subtitle = "")

# 2.2 Normality of Residuals
ass_multi_fw[[6]]  + 
  labs(title = "QQ Plot of Residuals: Food Waste", subtitle = "")

ass_multi_sfw[[6]] + 
  labs(title = "QQ Plot of Residuals: Solid Food Waste", subtitle = "")

ass_multi_lfw[[6]] + 
  labs(title = "QQ Plot of Residuals: Liquid Food Waste", subtitle = "")

# 2.3 shapiro-wilk normality test
check_normality(rdt_multi_fw)
## Warning: Non-normality of residuals detected (p = 0.001).
check_normality(rdt_multi_sfw)
## Warning: Non-normality of residuals detected (p < .001).
check_normality(rdt_multi_lfw)
## Warning: Non-normality of residuals detected (p = 0.023).
# 3. Homoscedasticity of the residuals
# 3.1 plot residuals
ass_multi_fw[[3]]  + 
  labs(title = "Homoscedasticity: Food Waste", subtitle = "")

ass_multi_sfw[[3]] + 
  labs(title = "Homoscedasticity: Solid Food Waste", subtitle = "")

ass_multi_lfw[[3]] + 
  labs(title = "Homoscedasticity: Liquid Food Waste", subtitle = "")

# 3.2 Breusch-Pagan test
lmtest::bptest(rdt_multi_fw)
## 
##  studentized Breusch-Pagan test
## 
## data:  rdt_multi_fw
## BP = 14.705, df = 9, p-value = 0.09935
lmtest::bptest(rdt_multi_sfw)
## 
##  studentized Breusch-Pagan test
## 
## data:  rdt_multi_sfw
## BP = 10.062, df = 9, p-value = 0.3455
lmtest::bptest(rdt_multi_lfw)
## 
##  studentized Breusch-Pagan test
## 
## data:  rdt_multi_lfw
## BP = 15.294, df = 9, p-value = 0.08316
# 4. No influential points (outliers)
ass_multi_fw[[4]]  + labs(title = "Outliers: Food Waste", subtitle = "")

ass_multi_sfw[[4]] + labs(title = "Outliers: Solid Food Waste", subtitle = "")

ass_multi_lfw[[4]] + labs(title = "Outliers: Liquid Food Waste", subtitle = "")

# 5. No multicollinearity
ass_multi_fw[[5]]  + labs(title = "VIF: Food Waste", subtitle = "")

ass_multi_sfw[[5]] + labs(title = "VIF: Solid Food Waste", subtitle = "")

ass_multi_lfw[[5]] + labs(title = "VIF: Liquid Food Waste", subtitle = "")

# 6. Independence of the observations
# Autocorrelation
check_autocorrelation(rdt_multi_fw)
## OK: Residuals appear to be independent and not autocorrelated (p = 0.474).
check_autocorrelation(rdt_multi_sfw)
## OK: Residuals appear to be independent and not autocorrelated (p = 0.500).
check_autocorrelation(rdt_multi_lfw)
## OK: Residuals appear to be independent and not autocorrelated (p = 0.470).
Polynomial model
# poly- food waste -----
rdt_poly_fw <- food_waste_kg ~ container * time + 
                             container * I(time^2) +
                             temp_c + humi_p + prcp_mm + 
                             liquors + sales + halfs
rdt_poly_fw <- df %>% 
            filter(!is_closed) %>% 
            lm(rdt_poly_fw, data = .)
summary(rdt_poly_fw)
## 
## Call:
## lm(formula = rdt_poly_fw, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.0072 -0.6418 -0.1261  0.4642  3.2952 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -2.4671604  1.0125798  -2.437   0.0160 *  
## container            0.3838594  0.5208439   0.737   0.4623    
## time                -0.0189069  0.0186297  -1.015   0.3118    
## I(time^2)           -0.0001330  0.0002020  -0.658   0.5114    
## temp_c              -0.0083476  0.0127768  -0.653   0.5145    
## humi_p               0.0103180  0.0093920   1.099   0.2737    
## prcp_mm             -0.0513615  0.0414043  -1.240   0.2167    
## liquors             -0.0068301  0.0510638  -0.134   0.8938    
## sales                0.0041346  0.0005102   8.104 1.78e-13 ***
## halfs                0.0703055  0.0296094   2.374   0.0188 *  
## container:time       0.0469042  0.0293172   1.600   0.1117    
## container:I(time^2) -0.0002283  0.0003461  -0.660   0.5106    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.008 on 149 degrees of freedom
## Multiple R-squared:  0.5186, Adjusted R-squared:  0.4831 
## F-statistic: 14.59 on 11 and 149 DF,  p-value: < 2.2e-16
# poly- solid food waste -----
rdt_poly_sfw <- solid_waste_kg ~ container * time + 
                               container * I(time^2) +
                               temp_c + humi_p + prcp_mm + 
                               liquors + sales + halfs
rdt_poly_sfw <- df %>% 
            filter(!is_closed) %>% 
            lm(rdt_poly_sfw, data = .)
summary(rdt_poly_sfw)
## 
## Call:
## lm(formula = rdt_poly_sfw, data = .)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.69124 -0.24652 -0.06983  0.18138  2.25175 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -7.004e-01  4.129e-01  -1.696   0.0919 .  
## container            1.862e-01  2.124e-01   0.877   0.3821    
## time                -1.498e-02  7.596e-03  -1.971   0.0505 .  
## I(time^2)           -1.467e-04  8.237e-05  -1.781   0.0770 .  
## temp_c              -4.899e-03  5.210e-03  -0.940   0.3486    
## humi_p               2.446e-03  3.830e-03   0.639   0.5240    
## prcp_mm             -2.077e-02  1.688e-02  -1.230   0.2206    
## liquors              1.013e-02  2.082e-02   0.487   0.6272    
## sales                1.249e-03  2.080e-04   6.005  1.4e-08 ***
## halfs                1.095e-02  1.207e-02   0.907   0.3658    
## container:time       2.138e-02  1.195e-02   1.788   0.0758 .  
## container:I(time^2)  5.136e-05  1.411e-04   0.364   0.7164    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4112 on 149 degrees of freedom
## Multiple R-squared:  0.3679, Adjusted R-squared:  0.3212 
## F-statistic: 7.884 on 11 and 149 DF,  p-value: 1.063e-10
# poly- liquid food waste -----
rdt_poly_lfw <- liquid_waste_kg ~ container * time + 
                                container * I(time^2) +
                                temp_c + humi_p + prcp_mm + 
                                liquors + sales + halfs
rdt_poly_lfw <- df %>% 
            filter(!is_closed) %>% 
            lm(rdt_poly_lfw, data = .)
summary(rdt_poly_lfw)
## 
## Call:
## lm(formula = rdt_poly_lfw, data = .)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.53737 -0.42153 -0.08896  0.35757  1.87042 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -1.7668065  0.7103879  -2.487  0.01398 *  
## container            0.1976864  0.3654045   0.541  0.58931    
## time                -0.0039315  0.0130699  -0.301  0.76398    
## I(time^2)            0.0000137  0.0001417   0.097  0.92311    
## temp_c              -0.0034488  0.0089637  -0.385  0.70097    
## humi_p               0.0078720  0.0065890   1.195  0.23410    
## prcp_mm             -0.0305946  0.0290477  -1.053  0.29393    
## liquors             -0.0169643  0.0358244  -0.474  0.63652    
## sales                0.0028854  0.0003579   8.061 2.27e-13 ***
## halfs                0.0593525  0.0207728   2.857  0.00489 ** 
## container:time       0.0255287  0.0205679   1.241  0.21649    
## container:I(time^2) -0.0002796  0.0002428  -1.152  0.25130    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7074 on 149 degrees of freedom
## Multiple R-squared:  0.5297, Adjusted R-squared:  0.495 
## F-statistic: 15.26 on 11 and 149 DF,  p-value: < 2.2e-16

Ass-Poly

  1. Linearity of the relationships between the dependent and independent variables
  2. Normality of the residuals
  3. Homoscedasticity of the residuals
  4. No influential points (outliers)
  5. No multicollinearity
  6. Independence of the observations
library(performance)
ass_poly_fw  <- plot(check_model(rdt_poly_fw, detrend=FALSE,panel = FALSE))
ass_poly_sfw <- plot(check_model(rdt_poly_sfw, detrend=FALSE,panel = FALSE))
ass_poly_lfw <- plot(check_model(rdt_poly_lfw, detrend=FALSE,panel = FALSE))

# 1. Linearity of the relationships between the dependent and independent variables
# 1.1 plot residual vs fitted values
ass_poly_fw[[2]]  + labs(title = "Linearity: Food Waste", subtitle = "")

ass_poly_sfw[[2]] + labs(title = "Linearity: Solid Food Waste", subtitle = "")

ass_poly_lfw[[2]] + labs(title = "Linearity: Liquid Food Waste", subtitle = "")

# 1.2 check linearity
check_heteroscedasticity(rdt_poly_fw)
## Warning: Heteroscedasticity (non-constant error variance) detected (p = 0.013).
check_heteroscedasticity(rdt_poly_sfw)
## Warning: Heteroscedasticity (non-constant error variance) detected (p = 0.003).
check_heteroscedasticity(rdt_poly_lfw)
## Warning: Heteroscedasticity (non-constant error variance) detected (p = 0.007).
# 2. Normality of the residuals
# 2.1 histogram of residuals
# Normality of Residuals: Food Waste
plot(check_normality(rdt_poly_fw), type = "density") + 
  labs(title = "Normality of Residuals: Food Waste", subtitle = "")

# Normality of Residuals: Solid Food Waste
plot(check_normality(rdt_poly_sfw), type = "density") + 
  labs(title = "Normality of Residuals: Solid Food Waste", subtitle = "")

# Normality of Residuals: Liquid Food Waste
plot(check_normality(rdt_poly_lfw), type = "density") + 
  labs(title = "Normality of Residuals: Liquid Food Waste", subtitle = "")

# 2.2 Normality of Residuals
ass_poly_fw[[6]]  + 
  labs(title = "QQ Plot of Residuals: Food Waste", subtitle = "")

ass_poly_sfw[[6]] + 
  labs(title = "QQ Plot of Residuals: Solid Food Waste", subtitle = "")

ass_poly_lfw[[6]] + 
  labs(title = "QQ Plot of Residuals: Liquid Food Waste", subtitle = "")

# 2.3 shapiro-wilk normality test
check_normality(rdt_poly_fw)
## Warning: Non-normality of residuals detected (p < .001).
check_normality(rdt_poly_sfw)
## Warning: Non-normality of residuals detected (p < .001).
check_normality(rdt_poly_lfw)
## Warning: Non-normality of residuals detected (p = 0.013).
# 3. Homoscedasticity of the residuals
# 3.1 plot residuals
ass_poly_fw[[3]]  + 
  labs(title = "Homoscedasticity: Food Waste", subtitle = "")

ass_poly_sfw[[3]] + 
  labs(title = "Homoscedasticity: Solid Food Waste", subtitle = "")

ass_poly_lfw[[3]] + 
  labs(title = "Homoscedasticity: Liquid Food Waste", subtitle = "")

# 3.2 Breusch-Pagan test
lmtest::bptest(rdt_poly_fw)
## 
##  studentized Breusch-Pagan test
## 
## data:  rdt_poly_fw
## BP = 16.435, df = 11, p-value = 0.1257
lmtest::bptest(rdt_poly_sfw)
## 
##  studentized Breusch-Pagan test
## 
## data:  rdt_poly_sfw
## BP = 11.26, df = 11, p-value = 0.4218
lmtest::bptest(rdt_poly_lfw)
## 
##  studentized Breusch-Pagan test
## 
## data:  rdt_poly_lfw
## BP = 16.008, df = 11, p-value = 0.1408
# 4. No influential points (outliers)
ass_poly_fw[[4]]  + labs(title = "Outliers: Food Waste", subtitle = "")

ass_poly_sfw[[4]] + labs(title = "Outliers: Solid Food Waste", subtitle = "")

ass_poly_lfw[[4]] + labs(title = "Outliers: Liquid Food Waste", subtitle = "")

# 5. No multicollinearity
ass_poly_fw[[5]]  + labs(title = "VIF: Food Waste", subtitle = "")

ass_poly_sfw[[5]] + labs(title = "VIF: Solid Food Waste", subtitle = "")

ass_poly_lfw[[5]] + labs(title = "VIF: Liquid Food Waste", subtitle = "")

# 6. Independence of the observations
# Autocorrelation
check_autocorrelation(rdt_poly_fw)
## OK: Residuals appear to be independent and not autocorrelated (p = 0.490).
check_autocorrelation(rdt_poly_sfw)
## OK: Residuals appear to be independent and not autocorrelated (p = 0.650).
check_autocorrelation(rdt_poly_lfw)
## OK: Residuals appear to be independent and not autocorrelated (p = 0.490).

Per Customer

Interaction
################ Interaction

# simple food waste per customer -----
rdt_int_fw_p <- food_waste_p_kg ~ container * time 

rdt_int_fw_p <- df %>% 
            filter(!is_closed) %>% 
            lm(rdt_int_fw_p, data = .)
summary(rdt_int_fw_p)
## 
## Call:
## lm(formula = rdt_int_fw_p, data = .)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.085830 -0.026913 -0.000256  0.022668  0.144070 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.0634025  0.0084860   7.471 5.19e-12 ***
## container       0.0205965  0.0123107   1.673   0.0963 .  
## time           -0.0002550  0.0001694  -1.505   0.1344    
## container:time  0.0002948  0.0002683   1.099   0.2736    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03901 on 157 degrees of freedom
## Multiple R-squared:  0.03369,    Adjusted R-squared:  0.01523 
## F-statistic: 1.825 on 3 and 157 DF,  p-value: 0.1449
# simple solid food waste per customer -----
rdt_int_sfw_p <- solid_waste_p_kg ~ container * time 
rdt_int_sfw_p <- df %>% 
            filter(!is_closed) %>% 
            lm(rdt_int_sfw_p, data = .)
summary(rdt_int_sfw_p)
## 
## Call:
## lm(formula = rdt_int_sfw_p, data = .)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.025637 -0.009649 -0.001998  0.007429  0.107324 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     2.413e-02  3.438e-03   7.018 6.35e-11 ***
## container       6.087e-04  4.988e-03   0.122    0.903    
## time           -6.278e-05  6.865e-05  -0.914    0.362    
## container:time  7.849e-05  1.087e-04   0.722    0.471    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0158 on 157 degrees of freedom
## Multiple R-squared:  0.00791,    Adjusted R-squared:  -0.01105 
## F-statistic: 0.4173 on 3 and 157 DF,  p-value: 0.7408
# simple liquid food waste per customer -----
rdt_int_lfw_p <- liquid_waste_p_kg ~ container * time 
rdt_int_lfw_p <- df %>% 
            filter(!is_closed) %>% 
            lm(rdt_int_lfw_p, data = .)
summary(rdt_int_lfw_p)
## 
## Call:
## lm(formula = rdt_int_lfw_p, data = .)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.060368 -0.021770  0.000354  0.019225  0.067531 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.0392726  0.0060700   6.470 1.19e-09 ***
## container       0.0199878  0.0088058   2.270   0.0246 *  
## time           -0.0001922  0.0001212  -1.586   0.1148    
## container:time  0.0002163  0.0001919   1.127   0.2615    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0279 on 157 degrees of freedom
## Multiple R-squared:  0.06328,    Adjusted R-squared:  0.04538 
## F-statistic: 3.535 on 3 and 157 DF,  p-value: 0.01621

Ass-Interaction

  1. Linearity of the relationships between the dependent and independent variables
  2. Normality of the residuals
  3. Homoscedasticity of the residuals
  4. No influential points (outliers)
  5. No multicollinearity
  6. Independence of the observations
library(performance)
ass_int_fw_p  <- plot(check_model(rdt_int_fw_p, detrend=FALSE, panel = FALSE))
ass_int_sfw_p <- plot(check_model(rdt_int_sfw_p, detrend=FALSE,panel = FALSE))
ass_int_lfw_p <- plot(check_model(rdt_int_lfw_p, detrend=FALSE,panel = FALSE))

# 1. Linearity of the relationships between the dependent and independent variables
# 1.1 plot residual vs fitted values
ass_int_fw_p[[2]]  + labs(title = "Linearity: Food Waste", subtitle = "")

ass_int_sfw_p[[2]] + labs(title = "Linearity: Solid Food Waste", subtitle = "")

ass_int_lfw_p[[2]] + labs(title = "Linearity: Liquid Food Waste", subtitle = "")

# 1.2 check linearity
check_heteroscedasticity(rdt_int_fw_p)
## OK: Error variance appears to be homoscedastic (p = 0.560).
check_heteroscedasticity(rdt_int_sfw_p)
## OK: Error variance appears to be homoscedastic (p = 0.085).
check_heteroscedasticity(rdt_int_lfw_p)
## OK: Error variance appears to be homoscedastic (p = 0.357).
# 2. Normality of the residuals
# 2.1 histogram of residuals
# Normality of Residuals: Food Waste
plot(check_normality(rdt_int_fw_p), type = "density") + 
  labs(title = "Normality of Residuals: Food Waste", subtitle = "")

# Normality of Residuals: Solid Food Waste
plot(check_normality(rdt_int_sfw_p), type = "density") + 
  labs(title = "Normality of Residuals: Solid Food Waste", subtitle = "") 

# Normality of Residuals: Liquid Food Waste
plot(check_normality(rdt_int_lfw_p), type = "density") + 
  labs(title = "Normality of Residuals: Liquid Food Waste", subtitle = "") 

# 2.2 Normality of Residuals
ass_int_fw_p[[6]]  + labs(title = "QQ Plot of Residuals: Food Waste", subtitle = "")

ass_int_sfw_p[[6]] + labs(title = "QQ Plot of Residuals: Solid Food Waste", subtitle = "")

ass_int_lfw_p[[6]] + labs(title = "QQ Plot of Residuals: Liquid Food Waste", subtitle = "")

# 2.3 shapiro-wilk normality test
check_normality(rdt_int_fw_p)
## OK: residuals appear as normally distributed (p = 0.310).
check_normality(rdt_int_sfw_p)
## Warning: Non-normality of residuals detected (p < .001).
check_normality(rdt_int_lfw_p)
## OK: residuals appear as normally distributed (p = 0.506).
# 3. Homoscedasticity of the residuals
# 3.1 plot residuals
ass_int_fw_p[[3]]  + labs(title = "Homoscedasticity: Food Waste", subtitle = "")

ass_int_sfw_p[[3]] + labs(title = "Homoscedasticity: Solid Food Waste", subtitle = "")

ass_int_lfw_p[[3]] + labs(title = "Homoscedasticity: Liquid Food Waste", subtitle = "")

# 3.2 Breusch-Pagan test
lmtest::bptest(rdt_int_fw_p)
## 
##  studentized Breusch-Pagan test
## 
## data:  rdt_int_fw_p
## BP = 0.39927, df = 3, p-value = 0.9404
lmtest::bptest(rdt_int_sfw_p)
## 
##  studentized Breusch-Pagan test
## 
## data:  rdt_int_sfw_p
## BP = 0.78557, df = 3, p-value = 0.8529
lmtest::bptest(rdt_int_lfw_p)
## 
##  studentized Breusch-Pagan test
## 
## data:  rdt_int_lfw_p
## BP = 1.5608, df = 3, p-value = 0.6683
# 4. No influential points (outliers)
ass_int_fw_p[[4]]  + labs(title = "Outliers: Food Waste", subtitle = "")

ass_int_sfw_p[[4]] + labs(title = "Outliers: Solid Food Waste", subtitle = "")

ass_int_lfw_p[[4]] + labs(title = "Outliers: Liquid Food Waste", subtitle = "")

# 5. No multicollinearity
ass_int_fw_p[[5]]  + labs(title = "VIF: Food Waste", subtitle = "")

ass_int_sfw_p[[5]] + labs(title = "VIF: Solid Food Waste", subtitle = "")

ass_int_lfw_p[[5]] + labs(title = "VIF: Liquid Food Waste", subtitle = "")

# 6. Independence of the observations
# Autocorrelation
check_autocorrelation(rdt_int_fw_p)
## OK: Residuals appear to be independent and not autocorrelated (p = 0.644).
check_autocorrelation(rdt_int_sfw_p)
## OK: Residuals appear to be independent and not autocorrelated (p = 0.884).
check_autocorrelation(rdt_int_lfw_p)
## OK: Residuals appear to be independent and not autocorrelated (p = 0.646).
Multiple model
## Multiple model ----
# multi food waste per customer -----
rdt_multi_fw_p <- food_waste_p_kg ~ container * time +
                            temp_c + humi_p + prcp_mm + 
                            liquors + sales + halfs
rdt_multi_fw_p <- df %>% 
            filter(!is_closed) %>% 
            lm(rdt_multi_fw_p, data = .)
summary(rdt_multi_fw_p)
## 
## Call:
## lm(formula = rdt_multi_fw_p, data = .)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.074837 -0.024496 -0.002514  0.019536  0.148986 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)     2.272e-02  3.556e-02   0.639   0.5239  
## container       2.445e-02  1.382e-02   1.768   0.0790 .
## time           -2.128e-04  2.480e-04  -0.858   0.3921  
## temp_c         -1.980e-04  4.767e-04  -0.415   0.6785  
## humi_p          9.816e-05  3.502e-04   0.280   0.7796  
## prcp_mm        -1.753e-03  1.524e-03  -1.150   0.2519  
## liquors         1.114e-03  1.918e-03   0.581   0.5624  
## sales           4.082e-05  1.911e-05   2.135   0.0343 *
## halfs           7.092e-04  1.107e-03   0.641   0.5227  
## container:time  1.984e-04  3.784e-04   0.524   0.6008  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03793 on 151 degrees of freedom
## Multiple R-squared:  0.1212, Adjusted R-squared:  0.06883 
## F-statistic: 2.314 on 9 and 151 DF,  p-value: 0.01819
# multi solid food waste per customer -----
rdt_multi_sfw_p <- solid_waste_p_kg ~ container * time +
                            temp_c + humi_p + prcp_mm + 
                            liquors + sales + halfs
rdt_multi_sfw_p <- df %>% 
            filter(!is_closed) %>% 
            lm(rdt_multi_sfw_p, data = .)
summary(rdt_multi_sfw_p)
## 
## Call:
## lm(formula = rdt_multi_sfw_p, data = .)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.027494 -0.010028 -0.001538  0.007102  0.104802 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)
## (Intercept)     1.945e-02  1.477e-02   1.317    0.190
## container       4.123e-03  5.743e-03   0.718    0.474
## time           -7.996e-05  1.030e-04  -0.776    0.439
## temp_c         -1.554e-04  1.980e-04  -0.785    0.434
## humi_p         -3.292e-05  1.455e-04  -0.226    0.821
## prcp_mm        -7.440e-04  6.329e-04  -1.175    0.242
## liquors         6.531e-04  7.967e-04   0.820    0.414
## sales           9.221e-06  7.940e-06   1.161    0.247
## halfs          -2.701e-04  4.599e-04  -0.587    0.558
## container:time  4.472e-05  1.572e-04   0.285    0.776
## 
## Residual standard error: 0.01576 on 151 degrees of freedom
## Multiple R-squared:  0.05157,    Adjusted R-squared:  -0.00496 
## F-statistic: 0.9123 on 9 and 151 DF,  p-value: 0.5163
# multi liquid food waste per customer -----
rdt_multi_lfw_p <- liquid_waste_p_kg ~ container * time +
                            temp_c + humi_p + prcp_mm + 
                            liquors + sales + halfs
rdt_multi_lfw_p <- df %>% 
            filter(!is_closed) %>% 
            lm(rdt_multi_lfw_p, data = .)
summary(rdt_multi_lfw_p)
## 
## Call:
## lm(formula = rdt_multi_lfw_p, data = .)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.051511 -0.017780 -0.004288  0.016169  0.078517 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)     3.269e-03  2.512e-02   0.130   0.8966  
## container       2.032e-02  9.764e-03   2.081   0.0391 *
## time           -1.329e-04  1.752e-04  -0.759   0.4493  
## temp_c         -4.256e-05  3.367e-04  -0.126   0.8996  
## humi_p          1.311e-04  2.474e-04   0.530   0.5969  
## prcp_mm        -1.009e-03  1.076e-03  -0.937   0.3502  
## liquors         4.605e-04  1.355e-03   0.340   0.7344  
## sales           3.160e-05  1.350e-05   2.340   0.0206 *
## halfs           9.793e-04  7.819e-04   1.252   0.2124  
## container:time  1.537e-04  2.673e-04   0.575   0.5661  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02679 on 151 degrees of freedom
## Multiple R-squared:  0.1694, Adjusted R-squared:  0.1198 
## F-statistic: 3.421 on 9 and 151 DF,  p-value: 0.0007411

Ass-Multiple

  1. Linearity of the relationships between the dependent and independent variables
  2. Normality of the residuals
  3. Homoscedasticity of the residuals
  4. No influential points (outliers)
  5. No multicollinearity
  6. Independence of the observations
library(performance)
ass_multi_fw_p  <- plot(check_model(rdt_multi_fw_p, detrend=FALSE,panel = FALSE))
ass_multi_sfw_p <- plot(check_model(rdt_multi_sfw_p, detrend=FALSE,panel = FALSE))
ass_multi_lfw_p <- plot(check_model(rdt_multi_lfw_p, detrend=FALSE,panel = FALSE))

# 1. Linearity of the relationships between the dependent and independent variables
# 1.1 plot residual vs fitted values
ass_multi_fw_p[[2]]  + labs(title = "Linearity: Food Waste", subtitle = "")

ass_multi_sfw_p[[2]] + labs(title = "Linearity: Solid Food Waste", subtitle = "")

ass_multi_lfw_p[[2]] + labs(title = "Linearity: Liquid Food Waste", subtitle = "")

# 1.2 check linearity
check_heteroscedasticity(rdt_multi_fw_p)
## OK: Error variance appears to be homoscedastic (p = 0.077).
check_heteroscedasticity(rdt_multi_sfw_p)
## OK: Error variance appears to be homoscedastic (p = 0.053).
check_heteroscedasticity(rdt_multi_lfw_p)
## OK: Error variance appears to be homoscedastic (p = 0.334).
# 2. Normality of the residuals
# 2.1 histogram of residuals
# Normality of Residuals: Food Waste
plot(check_normality(rdt_multi_fw_p), type = "density") + 
  labs(title = "Normality of Residuals: Food Waste", subtitle = "")

# Normality of Residuals: Solid Food Waste
plot(check_normality(rdt_multi_sfw_p), type = "density") + 
  labs(title = "Normality of Residuals: Solid Food Waste", subtitle = "")

# Normality of Residuals: Liquid Food Waste
plot(check_normality(rdt_multi_lfw_p), type = "density") + 
  labs(title = "Normality of Residuals: Liquid Food Waste", subtitle = "")

# 2.2 Normality of Residuals
ass_multi_fw_p[[6]]  + 
  labs(title = "QQ Plot of Residuals: Food Waste", subtitle = "")

ass_multi_sfw_p[[6]] + 
  labs(title = "QQ Plot of Residuals: Solid Food Waste", subtitle = "")

ass_multi_lfw_p[[6]] + 
  labs(title = "QQ Plot of Residuals: Liquid Food Waste", subtitle = "")

# 2.3 shapiro-wilk normality test
check_normality(rdt_multi_fw)
## Warning: Non-normality of residuals detected (p = 0.001).
check_normality(rdt_multi_sfw)
## Warning: Non-normality of residuals detected (p < .001).
check_normality(rdt_multi_lfw)
## Warning: Non-normality of residuals detected (p = 0.023).
# 3. Homoscedasticity of the residuals
# 3.1 plot residuals
ass_multi_fw_p[[3]]  + 
  labs(title = "Homoscedasticity: Food Waste", subtitle = "")

ass_multi_sfw_p[[3]] + 
  labs(title = "Homoscedasticity: Solid Food Waste", subtitle = "")

ass_multi_lfw_p[[3]] + 
  labs(title = "Homoscedasticity: Liquid Food Waste", subtitle = "")

# 3.2 Breusch-Pagan test
lmtest::bptest(rdt_multi_fw_p)
## 
##  studentized Breusch-Pagan test
## 
## data:  rdt_multi_fw_p
## BP = 15.097, df = 9, p-value = 0.08831
lmtest::bptest(rdt_multi_sfw_p)
## 
##  studentized Breusch-Pagan test
## 
## data:  rdt_multi_sfw_p
## BP = 10.355, df = 9, p-value = 0.3225
lmtest::bptest(rdt_multi_lfw_p)
## 
##  studentized Breusch-Pagan test
## 
## data:  rdt_multi_lfw_p
## BP = 13.732, df = 9, p-value = 0.1322
# 4. No influential points (outliers)
ass_multi_fw_p[[4]]  + labs(title = "Outliers: Food Waste", subtitle = "")

ass_multi_sfw_p[[4]] + labs(title = "Outliers: Solid Food Waste", subtitle = "")

ass_multi_lfw_p[[4]] + labs(title = "Outliers: Liquid Food Waste", subtitle = "")

# 5. No multicollinearity
ass_multi_fw_p[[5]]  + labs(title = "VIF: Food Waste", subtitle = "")

ass_multi_sfw_p[[5]] + labs(title = "VIF: Solid Food Waste", subtitle = "")

ass_multi_lfw_p[[5]] + labs(title = "VIF: Liquid Food Waste", subtitle = "")

# 6. Independence of the observations
# Autocorrelation
check_autocorrelation(rdt_multi_fw_p)
## OK: Residuals appear to be independent and not autocorrelated (p = 0.946).
check_autocorrelation(rdt_multi_sfw_p)
## OK: Residuals appear to be independent and not autocorrelated (p = 0.824).
check_autocorrelation(rdt_multi_lfw_p)
## OK: Residuals appear to be independent and not autocorrelated (p = 0.938).
polynomial model
# poly- food waste per customer -----
rdt_poly_fw_p <- food_waste_p_kg ~ container * time + 
                             container * I(time^2) +
                             temp_c + humi_p + prcp_mm + 
                             liquors + sales + halfs
rdt_poly_fw_p <- df %>% 
            filter(!is_closed) %>% 
            lm(rdt_poly_fw_p, data = .)
summary(rdt_poly_fw_p)
## 
## Call:
## lm(formula = rdt_poly_fw_p, data = .)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.07797 -0.02446 -0.00292  0.02073  0.14856 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)  
## (Intercept)          1.857e-02  3.821e-02   0.486   0.6277  
## container            1.680e-02  1.965e-02   0.855   0.3940  
## time                -3.906e-04  7.030e-04  -0.556   0.5793  
## I(time^2)           -2.359e-06  7.623e-06  -0.309   0.7574  
## temp_c              -1.356e-04  4.821e-04  -0.281   0.7789  
## humi_p               1.238e-04  3.544e-04   0.349   0.7273  
## prcp_mm             -2.078e-03  1.562e-03  -1.330   0.1855  
## liquors              1.201e-03  1.927e-03   0.623   0.5339  
## sales                4.164e-05  1.925e-05   2.163   0.0321 *
## halfs                6.986e-04  1.117e-03   0.625   0.5327  
## container:time       1.170e-03  1.106e-03   1.058   0.2919  
## container:I(time^2) -8.306e-06  1.306e-05  -0.636   0.5257  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03805 on 149 degrees of freedom
## Multiple R-squared:  0.1274, Adjusted R-squared:  0.06295 
## F-statistic: 1.977 on 11 and 149 DF,  p-value: 0.0344
# poly- solid food waste per customer -----
rdt_poly_sfw_p <- solid_waste_p_kg ~ container * time + 
                               container * I(time^2) +
                               temp_c + humi_p + prcp_mm + 
                               liquors + sales + halfs
rdt_poly_sfw_p <- df %>% 
            filter(!is_closed) %>% 
            lm(rdt_poly_sfw_p, data = .)
summary(rdt_poly_sfw_p)
## 
## Call:
## lm(formula = rdt_poly_sfw_p, data = .)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.027962 -0.010345 -0.002081  0.006764  0.102708 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)
## (Intercept)          1.199e-02  1.582e-02   0.758    0.450
## container            6.501e-03  8.136e-03   0.799    0.426
## time                -4.329e-04  2.910e-04  -1.487    0.139
## I(time^2)           -4.166e-06  3.156e-06  -1.320    0.189
## temp_c              -1.347e-04  1.996e-04  -0.675    0.501
## humi_p              -5.413e-06  1.467e-04  -0.037    0.971
## prcp_mm             -8.951e-04  6.468e-04  -1.384    0.168
## liquors              7.133e-04  7.977e-04   0.894    0.373
## sales                1.022e-05  7.970e-06   1.282    0.202
## halfs               -3.289e-04  4.625e-04  -0.711    0.478
## container:time       6.071e-04  4.580e-04   1.326    0.187
## container:I(time^2)  1.443e-06  5.406e-06   0.267    0.790
## 
## Residual standard error: 0.01575 on 149 degrees of freedom
## Multiple R-squared:  0.06457,    Adjusted R-squared:  -0.004489 
## F-statistic: 0.935 on 11 and 149 DF,  p-value: 0.5087
# poly- liquid food waste per customer -----
rdt_poly_lfw_p <- liquid_waste_p_kg ~ container * time + 
                                container * I(time^2) +
                                temp_c + humi_p + prcp_mm + 
                                liquors + sales + halfs
rdt_poly_lfw_p <- df %>% 
            filter(!is_closed) %>% 
            lm(rdt_poly_lfw_p, data = .)
summary(rdt_poly_lfw_p)
## 
## Call:
## lm(formula = rdt_poly_lfw_p, data = .)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.050007 -0.018305 -0.003888  0.017097  0.076798 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)  
## (Intercept)          6.582e-03  2.697e-02   0.244   0.8076  
## container            1.030e-02  1.387e-02   0.742   0.4590  
## time                 4.226e-05  4.962e-04   0.085   0.9322  
## I(time^2)            1.808e-06  5.381e-06   0.336   0.7374  
## temp_c              -8.461e-07  3.403e-04  -0.002   0.9980  
## humi_p               1.292e-04  2.502e-04   0.517   0.6062  
## prcp_mm             -1.183e-03  1.103e-03  -1.073   0.2851  
## liquors              4.880e-04  1.360e-03   0.359   0.7203  
## sales                3.143e-05  1.359e-05   2.313   0.0221 *
## halfs                1.028e-03  7.887e-04   1.303   0.1947  
## container:time       5.629e-04  7.809e-04   0.721   0.4721  
## container:I(time^2) -9.749e-06  9.218e-06  -1.058   0.2920  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02686 on 149 degrees of freedom
## Multiple R-squared:  0.1761, Adjusted R-squared:  0.1152 
## F-statistic: 2.895 on 11 and 149 DF,  p-value: 0.00176

Ass-Poly

  1. Linearity of the relationships between the dependent and independent variables
  2. Normality of the residuals
  3. Homoscedasticity of the residuals
  4. No influential points (outliers)
  5. No multicollinearity
  6. Independence of the observations
library(performance)
ass_poly_fw_p  <- plot(check_model(rdt_poly_fw_p, detrend=FALSE,panel = FALSE))
ass_poly_sfw_p <- plot(check_model(rdt_poly_sfw_p, detrend=FALSE,panel = FALSE))
ass_poly_lfw_p <- plot(check_model(rdt_poly_lfw_p, detrend=FALSE,panel = FALSE))

# 1. Linearity of the relationships between the dependent and independent variables
# 1.1 plot residual vs fitted values
ass_poly_fw_p[[2]]  + labs(title = "Linearity: Food Waste", subtitle = "")

ass_poly_sfw_p[[2]] + labs(title = "Linearity: Solid Food Waste", subtitle = "")

ass_poly_lfw_p[[2]] + labs(title = "Linearity: Liquid Food Waste", subtitle = "")

# 1.2 check linearity
check_heteroscedasticity(rdt_poly_fw_p)
## OK: Error variance appears to be homoscedastic (p = 0.107).
check_heteroscedasticity(rdt_poly_sfw_p)
## Warning: Heteroscedasticity (non-constant error variance) detected (p = 0.004).
check_heteroscedasticity(rdt_poly_lfw_p)
## OK: Error variance appears to be homoscedastic (p = 0.321).
# 2. Normality of the residuals
# 2.1 histogram of residuals
# Normality of Residuals: Food Waste
plot(check_normality(rdt_poly_fw_p), type = "density") + 
  labs(title = "Normality of Residuals: Food Waste", subtitle = "")

# Normality of Residuals: Solid Food Waste
plot(check_normality(rdt_poly_sfw_p), type = "density") + 
  labs(title = "Normality of Residuals: Solid Food Waste", subtitle = "")

# Normality of Residuals: Liquid Food Waste
plot(check_normality(rdt_poly_lfw_p), type = "density") + 
  labs(title = "Normality of Residuals: Liquid Food Waste", subtitle = "")

# 2.2 Normality of Residuals
ass_poly_fw_p[[6]]  + 
  labs(title = "QQ Plot of Residuals: Food Waste", subtitle = "")

ass_poly_sfw_p[[6]] + 
  labs(title = "QQ Plot of Residuals: Solid Food Waste", subtitle = "")

ass_poly_lfw_p[[6]] + 
  labs(title = "QQ Plot of Residuals: Liquid Food Waste", subtitle = "")

# 2.3 shapiro-wilk normality test
check_normality(rdt_poly_fw_p)
## Warning: Non-normality of residuals detected (p = 0.012).
check_normality(rdt_poly_sfw_p)
## Warning: Non-normality of residuals detected (p < .001).
check_normality(rdt_poly_lfw_p)
## Warning: Non-normality of residuals detected (p = 0.023).
# 3. Homoscedasticity of the residuals
# 3.1 plot residuals
ass_poly_fw_p[[3]]  + 
  labs(title = "Homoscedasticity: Food Waste", subtitle = "")

ass_poly_sfw_p[[3]] + 
  labs(title = "Homoscedasticity: Solid Food Waste", subtitle = "")

ass_poly_lfw_p[[3]] + 
  labs(title = "Homoscedasticity: Liquid Food Waste", subtitle = "")

# 3.2 Breusch-Pagan test
lmtest::bptest(rdt_poly_fw_p)
## 
##  studentized Breusch-Pagan test
## 
## data:  rdt_poly_fw_p
## BP = 17.523, df = 11, p-value = 0.09332
lmtest::bptest(rdt_poly_sfw_p)
## 
##  studentized Breusch-Pagan test
## 
## data:  rdt_poly_sfw_p
## BP = 11.556, df = 11, p-value = 0.3979
lmtest::bptest(rdt_poly_lfw_p)
## 
##  studentized Breusch-Pagan test
## 
## data:  rdt_poly_lfw_p
## BP = 16.922, df = 11, p-value = 0.1102
# 4. No influential points (outliers)
ass_poly_fw_p[[4]]  + labs(title = "Outliers: Food Waste", subtitle = "")

ass_poly_sfw_p[[4]] + labs(title = "Outliers: Solid Food Waste", subtitle = "")

ass_poly_lfw_p[[4]] + labs(title = "Outliers: Liquid Food Waste", subtitle = "")

# 5. No multicollinearity
ass_poly_fw_p[[5]]  + labs(title = "VIF: Food Waste", subtitle = "")

ass_poly_sfw_p[[5]] + labs(title = "VIF: Solid Food Waste", subtitle = "")

ass_poly_lfw_p[[5]] + labs(title = "VIF: Liquid Food Waste", subtitle = "")

# 6. Independence of the observations
# Autocorrelation
check_autocorrelation(rdt_poly_fw_p)
## OK: Residuals appear to be independent and not autocorrelated (p = 0.976).
check_autocorrelation(rdt_poly_sfw_p)
## OK: Residuals appear to be independent and not autocorrelated (p = 0.822).
check_autocorrelation(rdt_poly_lfw_p)
## OK: Residuals appear to be independent and not autocorrelated (p = 0.914).

Local Regression

per Customer

library(purrr)
library(tidyr)
library(tidyverse)
## Warning: package 'stringr' was built under R version 4.2.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(broom)

bandwidth = 25
# Local Regression of container on food_waste_kg ----
loc_fw_p <- df %>%
  filter(!is_closed) %>% 
  filter(abs(time) <= bandwidth) %>% 
  lm(food_waste_p_kg ~ container, data = .)
summary(loc_fw_p)
## 
## Call:
## lm(formula = food_waste_p_kg ~ container, data = .)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.071951 -0.021391  0.001999  0.021733  0.079921 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.066985   0.007257   9.230 2.69e-12 ***
## container   0.016731   0.010164   1.646    0.106    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03629 on 49 degrees of freedom
## Multiple R-squared:  0.0524, Adjusted R-squared:  0.03306 
## F-statistic: 2.709 on 1 and 49 DF,  p-value: 0.1062
# Local Regression of container on solid food_waste_kg ----
loc_sfw_p <- df %>%
  filter(!is_closed) %>% 
  filter(abs(time) <= bandwidth) %>% 
  lm(solid_waste_p_kg ~ container, data = .)
summary(loc_sfw_p)
## 
## Call:
## lm(formula = solid_waste_p_kg ~ container, data = .)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.022725 -0.006960 -0.002115  0.007308  0.025352 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.022725   0.002242  10.135 1.29e-13 ***
## container   0.001804   0.003140   0.575    0.568    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01121 on 49 degrees of freedom
## Multiple R-squared:  0.006691,   Adjusted R-squared:  -0.01358 
## F-statistic: 0.3301 on 1 and 49 DF,  p-value: 0.5682
# Local Regression of container on liquid food_waste_kg----
loc_lfw_p <- df %>%
  filter(!is_closed) %>% 
  filter(abs(time) <= bandwidth) %>% 
  lm(liquid_waste_p_kg ~ container, data = .)
summary(loc_lfw_p)
## 
## Call:
## lm(formula = liquid_waste_p_kg ~ container, data = .)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.056246 -0.019655  0.000813  0.019476  0.068086 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.044261   0.005547   7.979 2.06e-10 ***
## container   0.014926   0.007769   1.921   0.0605 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02774 on 49 degrees of freedom
## Multiple R-squared:  0.07005,    Adjusted R-squared:  0.05107 
## F-statistic: 3.691 on 1 and 49 DF,  p-value: 0.06053
Interaction
bandwidth = 25

# Local Regression of container on food_waste_kg ----
loc_fw_p <- df %>%
  filter(!is_closed) %>% 
  filter(abs(time) <= bandwidth) %>% 
  lm(food_waste_p_kg ~ container*time, data = .)
summary(loc_fw_p)
## 
## Call:
## lm(formula = food_waste_p_kg ~ container * time, data = .)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.076915 -0.019486 -0.000832  0.025310  0.076839 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.0552496  0.0151211   3.654 0.000649 ***
## container       0.0233299  0.0205930   1.133 0.263003    
## time           -0.0009027  0.0010172  -0.888 0.379327    
## container:time  0.0013136  0.0013979   0.940 0.352184    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03667 on 47 degrees of freedom
## Multiple R-squared:  0.07158,    Adjusted R-squared:  0.01232 
## F-statistic: 1.208 on 3 and 47 DF,  p-value: 0.3172
# Local Regression of container on solid food_waste_kg ----
loc_sfw_p <- df %>%
  filter(!is_closed) %>% 
  filter(abs(time) <= bandwidth) %>% 
  lm(solid_waste_p_kg ~ container*time, data = .)
summary(loc_sfw_p)
## 
## Call:
## lm(formula = solid_waste_p_kg ~ container * time, data = .)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.025961 -0.006713 -0.002136  0.008028  0.024176 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.0189002  0.0046765   4.042 0.000195 ***
## container       0.0054568  0.0063688   0.857 0.395900    
## time           -0.0002942  0.0003146  -0.935 0.354488    
## container:time  0.0003079  0.0004323   0.712 0.479863    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01134 on 47 degrees of freedom
## Multiple R-squared:  0.02488,    Adjusted R-squared:  -0.03736 
## F-statistic: 0.3997 on 3 and 47 DF,  p-value: 0.7538
# Local Regression of container on liquid food_waste_kg----
loc_lfw_p <- df %>%
  filter(!is_closed) %>% 
  filter(abs(time) <= bandwidth) %>% 
  lm(liquid_waste_p_kg ~ container*time, data = .)
summary(loc_lfw_p)
## 
## Call:
## lm(formula = liquid_waste_p_kg ~ container * time, data = .)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.059985 -0.016844  0.000476  0.019769  0.065107 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)   
## (Intercept)     0.0363494  0.0115661   3.143   0.0029 **
## container       0.0178730  0.0157515   1.135   0.2623   
## time           -0.0006086  0.0007780  -0.782   0.4380   
## container:time  0.0010057  0.0010693   0.941   0.3517   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02805 on 47 degrees of freedom
## Multiple R-squared:  0.08762,    Adjusted R-squared:  0.02938 
## F-statistic: 1.504 on 3 and 47 DF,  p-value: 0.2256
# Visualization local regression ----
locals <- tibble(bandwidth = seq(from = 6, to = 30, by = 1))
results_local <- locals %>% 
  group_by(bandwidth) %>% 
  mutate(
    loc_reg_p = map(bandwidth, ~ lm(food_waste_p_kg ~ container * time,
                                 data = subset(df, is_closed == FALSE), 
                                 subset = (abs(time) <= bandwidth))),
    tidied = map(loc_reg_p,tidy,conf.int = TRUE)
  ) %>% 
  unnest(tidied) %>% 
  filter(term == "container")

results_local %>% 
  ggplot(aes(x = bandwidth, y = estimate,
             ymin = conf.low, ymax = conf.high)) +
  geom_point() +
  geom_pointrange() +
  geom_hline(yintercept = 0, linetype = "dashed") +
  xlab("Band Width") + ylab("Estimated Effect") +
  ggtitle("Estimated Container Charge Effect on Food Waste with Interaction")

Multiple
bandwidth = 25

# Local Regression of container on food_waste_kg ----
loc_fw_p <- df %>%
  filter(!is_closed) %>% 
  filter(abs(time) <= bandwidth) %>% 
  lm(food_waste_p_kg ~ container*time
     + temp_c + humi_p + prcp_mm + liquors + sales + halfs, 
     data = .)
summary(loc_fw_p)
## 
## Call:
## lm(formula = food_waste_p_kg ~ container * time + temp_c + humi_p + 
##     prcp_mm + liquors + sales + halfs, data = .)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.070198 -0.018478 -0.000523  0.021209  0.070930 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)
## (Intercept)     7.963e-02  7.727e-02   1.031    0.309
## container       1.269e-02  2.150e-02   0.590    0.558
## time           -7.400e-04  1.049e-03  -0.706    0.484
## temp_c          1.008e-03  8.579e-04   1.174    0.247
## humi_p         -4.189e-04  8.007e-04  -0.523    0.604
## prcp_mm        -3.606e-03  3.060e-03  -1.178    0.245
## liquors         8.516e-04  3.177e-03   0.268    0.790
## sales           3.606e-05  3.459e-05   1.043    0.303
## halfs           3.668e-04  2.245e-03   0.163    0.871
## container:time  1.555e-03  1.585e-03   0.981    0.332
## 
## Residual standard error: 0.03567 on 41 degrees of freedom
## Multiple R-squared:  0.2336, Adjusted R-squared:  0.06541 
## F-statistic: 1.389 on 9 and 41 DF,  p-value: 0.2247
# Local Regression of container on solid food_waste_kg ----
loc_sfw_p <- df %>%
  filter(!is_closed) %>% 
  filter(abs(time) <= bandwidth) %>% 
  lm(solid_waste_p_kg ~ container*time
     + temp_c + humi_p + prcp_mm + liquors + sales + halfs,
     data = .)
summary(loc_sfw_p)
## 
## Call:
## lm(formula = solid_waste_p_kg ~ container * time + temp_c + humi_p + 
##     prcp_mm + liquors + sales + halfs, data = .)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0186022 -0.0056049 -0.0005597  0.0058769  0.0210676 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)     3.528e-02  2.293e-02   1.539   0.1315  
## container       1.389e-03  6.381e-03   0.218   0.8288  
## time           -2.229e-04  3.112e-04  -0.716   0.4778  
## temp_c          5.332e-04  2.546e-04   2.095   0.0424 *
## humi_p         -2.283e-04  2.376e-04  -0.961   0.3421  
## prcp_mm        -8.376e-04  9.079e-04  -0.923   0.3616  
## liquors         1.705e-04  9.426e-04   0.181   0.8573  
## sales           1.810e-05  1.026e-05   1.763   0.0853 .
## halfs          -4.264e-04  6.661e-04  -0.640   0.5257  
## container:time  3.030e-04  4.703e-04   0.644   0.5230  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01058 on 41 degrees of freedom
## Multiple R-squared:  0.2592, Adjusted R-squared:  0.09653 
## F-statistic: 1.594 on 9 and 41 DF,  p-value: 0.1495
# Local Regression of container on liquid food_waste_kg----
loc_lfw_p <- df %>%
  filter(!is_closed) %>% 
  filter(abs(time) <= bandwidth) %>% 
  lm(liquid_waste_p_kg ~ container*time
     + temp_c + humi_p + prcp_mm + liquors + sales + halfs,
     data = .)
summary(loc_lfw_p)
## 
## Call:
## lm(formula = liquid_waste_p_kg ~ container * time + temp_c + 
##     humi_p + prcp_mm + liquors + sales + halfs, data = .)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.051596 -0.015189 -0.001578  0.016023  0.063417 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)
## (Intercept)     4.436e-02  6.054e-02   0.733    0.468
## container       1.130e-02  1.685e-02   0.671    0.506
## time           -5.171e-04  8.218e-04  -0.629    0.533
## temp_c          4.743e-04  6.722e-04   0.706    0.484
## humi_p         -1.905e-04  6.274e-04  -0.304    0.763
## prcp_mm        -2.768e-03  2.398e-03  -1.155    0.255
## liquors         6.810e-04  2.489e-03   0.274    0.786
## sales           1.797e-05  2.710e-05   0.663    0.511
## halfs           7.931e-04  1.759e-03   0.451    0.654
## container:time  1.252e-03  1.242e-03   1.008    0.319
## 
## Residual standard error: 0.02795 on 41 degrees of freedom
## Multiple R-squared:  0.2097, Adjusted R-squared:  0.03621 
## F-statistic: 1.209 on 9 and 41 DF,  p-value: 0.3161
# Visualization ----
# Local regression with multiple
results_local_multi <- locals %>% 
  group_by(bandwidth) %>% 
  mutate(
    loc_reg_p = map(bandwidth, ~ lm(food_waste_p_kg ~ container * time 
                                    + temp_c + humi_p + prcp_mm 
                                    + liquors + sales + halfs,
                                 data = subset(df, is_closed == FALSE), 
                                 subset = (abs(time) <= bandwidth))),
    tidied = map(loc_reg_p,tidy,conf.int = TRUE)
  ) %>% 
  unnest(tidied) %>% 
  filter(term == "container")

results_local_multi %>% 
  ggplot(aes(x = bandwidth, y = estimate,
             ymin = conf.low, ymax = conf.high)) +
  geom_point() +
  geom_pointrange() +
  geom_hline(yintercept = 0, linetype = "dashed") +
  xlab("Band Width") + ylab("Estimated Effect") +
  ggtitle("Estimated Container Charge Effect on Food Waste with Multiple Model")

# Donut regression with multiple
results_local_multi_donut <- locals %>% 
  group_by(bandwidth) %>% 
  mutate(
    loc_reg_p = map(bandwidth, ~ lm(food_waste_p_kg ~ container * time 
                                    + temp_c + humi_p + prcp_mm 
                                    + liquors + sales + halfs,
                                 data = subset(df, is_closed == FALSE), 
                                 subset = (abs(time) > bandwidth))),
    tidied = map(loc_reg_p,tidy,conf.int = TRUE)
  ) %>% 
  unnest(tidied) %>% 
  filter(term == "container")

results_local_multi_donut %>% 
  ggplot(aes(x = bandwidth, y = estimate,
             ymin = conf.low, ymax = conf.high)) +
  geom_point() +
  geom_pointrange() +
  geom_hline(yintercept = 0, linetype = "dashed") +
  xlab("Band Width") + ylab("Estimated Effect") +
  ggtitle("Estimated Container Charge Effect on Food Waste with Multiple Model")